Introduction
In this script, the relative fraction of different cell types is evaluated between survival groups in the different anatomical areas.
Load data
df_data <- read.csv2(file.path(wd, 'input_data', 'expression_data.csv'), stringsAsFactors = FALSE)
df_annotations <- read.csv2(file.path(wd, 'input_data', 'cell_annotations.csv'), stringsAsFactors = FALSE)
df_meta <- read.csv2(file.path(wd, 'input_data', 'clinical_data.csv'), stringsAsFactors = FALSE)
df_areas <- read.csv2(file.path(wd, 'intermediate_results', 'anatomical_areas.csv'), stringsAsFactors = FALSE)Immune fraction
Relative fraction of immune cells across regions
round_base <- function(x, base = 50) {
return(round(x / base) * base)
}
# First we make the squares of 50*50 microns.
df_data <- df_data %>%
mutate(X2 = round_base(X, 50), Y2 = round_base(Y, 50))
level_data <- df_annotations %>%
filter(level == 'first_level') %>%
left_join(df_data %>% select(slide_id, scene_id, OID, X, Y, X2, Y2)) %>%
left_join(df_meta) %>%
left_join(df_areas)
immune_cells <- c("MF", "MG", "Tcyt", "Tdn", "B cell", "DC", "Monocyte", "MDSC", "Treg", "Th")
df_landscape <- level_data %>%
mutate(CellType = ifelse(CellType %in% immune_cells, 'immune', 'other')) %>%
group_by(survival_group, couple_id, patient_id, region, CellType) %>%
summarise(N = n()) %>%
ungroup() %>%
spread(CellType, N, fill = 0) %>%
gather(CellType, N, -survival_group, -couple_id, -patient_id, -region) %>%
group_by(survival_group, patient_id, couple_id, region) %>%
mutate(M = sum(N)) %>%
ungroup() %>%
mutate(P = 100*N/M)
ggplot(df_landscape, aes(paste(survival_group, patient_id), P, fill = CellType)) + geom_col() + theme_bw() + facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + theme_bw() + theme(legend.position = 'bottom') + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab('Cell Percentage (%)') + xlab('patient identifier') + ggtitle('immune level')stat.test <- df_landscape %>%
mutate(survival_group = survival_group == 'eLTS') %>%
group_by(region, CellType) %>%
summarise(average_eLTS = mean(P[survival_group == TRUE]), average_STS = mean(P[survival_group == FALSE]), pVal = summary(clogit(survival_group ~ P + strata(couple_id)))$coefficients[5]) %>%
ungroup()
DT::datatable(stat.test, filter = 'top')ggboxplot(df_landscape, x = "survival_group", y = "P", fill = "survival_group") +
theme_bw() +
geom_jitter(alpha = 0.2) +
scale_fill_manual(values = c('royalblue', 'goldenrod')) +
theme(legend.position = 'none') +
xlab(NULL) + ylab('Relative Cell Fraction (%)') +
facet_grid(CellType~region, scales = 'free_y')tmp_plot <- df_landscape %>%
group_by(couple_id, survival_group, CellType, region) %>%
summarise(P = mean(P)) %>%
ungroup() %>%
mutate(survival_group = plyr::mapvalues(survival_group, from = c('STS', 'eLTS'), to = c(1, 2)))
b <- runif(nrow(tmp_plot), -0.1, 0.1)
ggplot(tmp_plot) +
geom_boxplot(aes(x = as.numeric(survival_group), y = P, group = survival_group, fill = survival_group)) +
geom_point(aes(x = as.numeric(survival_group) + b, y = P), alpha = 0.2) +
geom_line(aes(x = as.numeric(survival_group) + b, y = P, group = couple_id), alpha = 0.2) +
scale_x_continuous(breaks = c(1,2), labels = c("STS", "eLTS"))+
xlab("survival_group") + facet_grid(CellType~region, scales = 'free_y') +
scale_fill_manual(values = c('royalblue', 'goldenrod')) +
theme_bw()tmp_plot <- tmp_plot %>%
mutate(survival_group = plyr::mapvalues(survival_group, from = c(1,2), to = c('STS', 'eLTS'))) %>%
group_by(couple_id) %>%
mutate(QC = n_distinct(survival_group)) %>%
ungroup() %>%
filter(QC == max(QC)) %>%
select(-QC) %>%
spread(survival_group, P, fill = 0) %>%
mutate(delta = eLTS-STS) %>%
group_by(CellType, region) %>%
arrange(-delta) %>%
mutate(N = 1:n()) %>%
ungroup() %>%
group_by(CellType, region) %>%
mutate(label_col = delta/max(abs(delta))) %>%
ungroup()
ggplot(tmp_plot, aes(N, delta, fill = label_col)) +
geom_col() +
theme_bw() +
facet_grid(CellType~region, scales = 'free') +
theme_bw() +
xlab('patient ranking') +
scale_fill_gradient2(low = 'royalblue', high = 'firebrick', mid = 'white', midpoint = 0)tmp_plot <- tmp_plot %>%
mutate(ID = paste(CellType, region, sep = '_')) %>%
select(couple_id, ID, delta) %>%
spread(ID, delta, fill = 0)
tmp_matrix <- tmp_plot %>% select(-couple_id) %>% as.matrix()
rownames(tmp_matrix) <- tmp_plot$couple_id
pheatmap(tmp_matrix)All levels
We are going to iterate through all the levels and do the analysis per level per region. At each level we compare relative expression, for example: - MF relative to all cells - M2 MF relative to all MF
for(i in unique(df_annotations$level)){
print(i)
level_data <- df_annotations %>%
filter(level == i) %>%
left_join(df_data %>% select(slide_id, scene_id, OID, X, Y, X2, Y2)) %>%
left_join(df_meta) %>%
left_join(df_areas)
df_landscape <- level_data %>%
group_by(survival_group, couple_id, patient_id, region, CellType) %>%
summarise(N = n()) %>%
ungroup() %>%
spread(CellType, N, fill = 0) %>%
gather(CellType, N, -survival_group, -couple_id, -patient_id, -region) %>%
group_by(survival_group, patient_id, couple_id, region) %>%
mutate(M = sum(N)) %>%
ungroup() %>%
mutate(P = 100*N/M)
print(ggplot(df_landscape, aes(paste(survival_group, patient_id), P, fill = CellType)) + geom_col() + theme_bw() + facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') + theme_bw() + theme(legend.position = 'bottom') + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + ylab('Cell Percentage (%)') + xlab('patient identifier') + ggtitle(i))
if(i == 'first_level'){
tmp_plot <- df_landscape %>%
mutate(couple_id = plyr::mapvalues(couple_id,
from = c('B', 'C', 'E', 'F', 'G', 'I', 'J', 'K', 'L', 'M', 'N', 'P'),
to = paste0('P', str_pad(c(1:n_distinct(couple_id)), width = 2, pad = '0'))))
print(ggplot(tmp_plot, aes(paste(survival_group, patient_id), P, fill = CellType)) +
geom_col() +
theme_bw() +
facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') +
theme_bw() +
theme(legend.position = 'bottom') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab('Cell Percentage (%)') +
xlab('Patient') +
ggtitle(i) +
scale_fill_manual(values = c('B cell'= rgb(1, 0.64, 0), 'NOS' = rgb(0.33, 0.33, 0.33), 'Tcyt' = rgb(0, 0.2, 0.4), 'DC' = rgb(0.51, 0.26, 0), 'MDSC' = rgb(1, 0.71, 0.76), 'Monocyte' = rgb(0.3, 1, 0.3), 'Th' = rgb(0.15, 0.4, 0.8), 'Treg' = rgb(0.3, 0.56, 1), 'Tumor' = rgb(1, 0, 0), 'MF' = rgb(0.1, 0.5, 0.1), 'MG' = rgb(0.25, 0.7, 0.25), 'Tdn' = rgb(0, 0, 0.2))) + scale_x_discrete(labels = function(x) gsub(" .*", "", x)))
df_landscape <- df_landscape %>%
filter(CellType != 'NOS') %>%
group_by(survival_group, patient_id, couple_id, region) %>%
mutate(M = sum(N)) %>%
ungroup() %>%
mutate(P = 100*N/M)
tmp_plot <- df_landscape %>%
mutate(couple_id = plyr::mapvalues(couple_id,
from = c('B', 'C', 'E', 'F', 'G', 'I', 'J', 'K', 'L', 'M', 'N', 'P'),
to = paste0('P', str_pad(c(1:n_distinct(couple_id)), width = 2, pad = '0'))))
print(ggplot(tmp_plot, aes(paste(survival_group, patient_id), P, fill = CellType)) +
geom_col() +
theme_bw() +
facet_grid(region~couple_id, scales = 'free_x', space = 'free_x') +
theme_bw() +
theme(legend.position = 'bottom') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
ylab('Cell Percentage (%)') +
xlab('Patient') +
ggtitle(i) +
scale_fill_manual(values = c('B cell'= rgb(1, 0.64, 0), 'Tcyt' = rgb(0, 0.2, 0.4), 'DC' = rgb(0.51, 0.26, 0), 'MDSC' = rgb(1, 0.71, 0.76), 'Monocyte' = rgb(0.3, 1, 0.3), 'Th' = rgb(0.15, 0.4, 0.8), 'Treg' = rgb(0.3, 0.56, 1), 'Tumor' = rgb(1, 0, 0), 'MF' = rgb(0.1, 0.5, 0.1), 'MG' = rgb(0.25, 0.7, 0.25), 'Tdn' = rgb(0, 0, 0.2))) + scale_x_discrete(labels = function(x) gsub(" .*", "", x)))
}
stat.test <- df_landscape %>%
mutate(survival_group = survival_group == 'eLTS') %>%
group_by(region, CellType) %>%
summarise(average_eLTS = mean(P[survival_group == TRUE]), average_STS = mean(P[survival_group == FALSE]), pVal = summary(clogit(survival_group ~ P + strata(couple_id)))$coefficients[5]) %>%
ungroup()
DT::datatable(stat.test, filter = 'top')
print(ggboxplot(df_landscape, x = "survival_group", y = "P", fill = "survival_group") + #, palette = "jco", # ylim = c(0, 40)
theme_bw() +
geom_jitter(alpha = 0.2) +
scale_fill_manual(values = c('royalblue', 'goldenrod')) +
theme(legend.position = 'none') +
xlab(NULL) + ylab('Relative Cell Fraction (%)') +
facet_grid(CellType~region, scales = 'free_y') +
ggtitle(i))
# Deltas
tmp_plot <- df_landscape %>%
group_by(couple_id, survival_group, CellType, region) %>%
summarise(P = mean(P)) %>%
ungroup() %>%
mutate(survival_group = plyr::mapvalues(survival_group, from = c('STS', 'eLTS'), to = c(1, 2)))
b <- runif(nrow(tmp_plot), -0.1, 0.1)
print(ggplot(tmp_plot) +
geom_boxplot(aes(x = as.numeric(survival_group), y = P, group = survival_group, fill = survival_group)) +
geom_point(aes(x = as.numeric(survival_group) + b, y = P), alpha = 0.2) +
geom_line(aes(x = as.numeric(survival_group) + b, y = P, group = couple_id), alpha = 0.2) +
scale_x_continuous(breaks = c(1,2), labels = c("STS", "eLTS"))+
xlab("survival_group") + facet_grid(CellType~region, scales = 'free_y') +
scale_fill_manual(values = c('royalblue', 'goldenrod')) +
theme_bw())
tmp_plot <- tmp_plot %>%
mutate(survival_group = plyr::mapvalues(survival_group, from = c(1,2), to = c('STS', 'eLTS'))) %>%
group_by(couple_id) %>%
mutate(QC = n_distinct(survival_group)) %>%
ungroup() %>%
filter(QC == max(QC)) %>%
select(-QC) %>%
spread(survival_group, P, fill = 0) %>%
mutate(delta = eLTS-STS) %>%
group_by(CellType, region) %>%
arrange(-delta) %>%
mutate(N = 1:n()) %>%
ungroup() %>%
group_by(CellType, region) %>%
mutate(label_col = delta/max(abs(delta))) %>%
ungroup()
print(ggplot(tmp_plot, aes(N, delta, fill = label_col)) + geom_col() + theme_bw() + facet_grid(CellType~region, scales = 'free') + theme_bw() + scale_fill_gradient2(low = 'royalblue', high = 'firebrick', mid = 'white', midpoint = 0) + xlab('patient ranking'))
tmp_plot <- tmp_plot %>%
mutate(ID = paste(CellType, region, sep = '_')) %>%
select(couple_id, ID, delta) %>%
spread(ID, delta, fill = 0)
tmp_matrix <- tmp_plot %>% select(-couple_id) %>% as.matrix()
rownames(tmp_matrix) <- tmp_plot$couple_id
pheatmap(tmp_matrix)
norm_matrix <- scale(tmp_matrix)
norm_matrix[is.na(norm_matrix)] <- 0
pheatmap(norm_matrix)
if(i == 'first_level'){
tmp_plot <- df_landscape %>%
filter(CellType %in% c('MF', 'MG')) %>%
select(patient_id, survival_group, region, CellType, P) %>%
spread(CellType, P, fill = 0) %>%
mutate(survival_group = factor(survival_group, levels = c('STS', 'eLTS')))
print(ggplot(tmp_plot, aes(MF, MG, color = survival_group)) +
geom_point() +
theme_bw() +
facet_wrap(~region, scales = 'free') +
geom_smooth(method = 'lm') +
stat_cor() +
scale_color_manual(values = c('royalblue', 'goldenrod')) +
xlab('Relative Macrophage Percentage (%)') +
ylab('Relative Microglia Percentage (%)'))
tmp_plot <- df_landscape %>%
filter(CellType %in% c('MF', 'MG')) %>%
select(patient_id, survival_group, region, CellType, P) %>%
spread(CellType, P, fill = 0) %>%
mutate(ratio = log2(MF/MG)) %>%
group_by(region) %>%
mutate(tmp_rank = rank(ratio)) %>%
mutate(survival_group = factor(survival_group, levels = c('STS', 'eLTS')))
print(ggplot(tmp_plot, aes(tmp_rank, ratio, fill = survival_group)) +
geom_col() +
theme_bw() +
facet_wrap(~region, scales = 'free', nrow = 1) +
scale_fill_manual(values = c('royalblue', 'goldenrod')) +
xlab('Patient ranking') +
ylab('Macrophages Microglia log2 ratio') +
theme(legend.position = 'bottom'))
}
if(i %in% c('first_level', 'gbm_neftel')){
df_landscape <- level_data %>%
group_by(survival_group, couple_id, patient_id, treated, number_resections, slide_id, scene_id, region, CellType) %>%
summarise(N = n()) %>%
ungroup() %>%
spread(CellType, N, fill = 0) %>%
gather(CellType, N, -survival_group, -couple_id, -patient_id, -treated, -number_resections, -slide_id, -scene_id, -region) %>%
group_by(survival_group, patient_id, treated, number_resections, slide_id, scene_id, couple_id, region) %>%
mutate(M = sum(N)) %>%
ungroup() %>%
mutate(P = 100*N/M)
tmp_table <- df_landscape %>%
group_by(survival_group, patient_id, couple_id, treated, number_resections, region, CellType) %>%
summarise(average_fraction = mean(P), sd_fraction = sd(P)) %>%
ungroup()
DT::datatable(tmp_table, filter = 'top')
for(j in unique(tmp_table$region)){
print(ggplot(tmp_table %>% filter(region == j), aes(paste0('R', number_resections), average_fraction, fill = CellType)) +
geom_col() + geom_errorbar(aes(ymin = average_fraction-sd_fraction, ymax = average_fraction + sd_fraction)) +
theme_bw() +
facet_grid(CellType~survival_group+couple_id+patient_id+treated, scales = 'free', space = 'free_x', switch = 'y') +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
theme(legend.position = 'none') +
ggtitle(paste(i, j)) +
xlab('Resection ID') + ylab('Average Cell Percentage (%)') +
theme(strip.text.y.left = element_text(angle = 0)))
}
}
}## [1] "first_level"
## [1] "Th_subtypes"
## [1] "Microglia_subtypes"
## [1] "gbm_neftel"
## [1] "Tcy_activation"
## [1] "Tcy_subtypes"
## [1] "Mf_subtypes"
## [1] "gbm_stem"
## [1] "DC_subtypes"